{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Configuring pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# import numpy and pandas\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "# used for dates\n",
    "import datetime\n",
    "from datetime import datetime, date\n",
    "\n",
    "# Set some pandas options controlling output format\n",
    "pd.set_option('display.notebook_repr_html', False)\n",
    "pd.set_option('display.max_columns', 8)\n",
    "pd.set_option('display.max_rows', 10)\n",
    "pd.set_option('display.width', 80)\n",
    "\n",
    "# bring in matplotlib for graphics\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Creating a Series using Python lists and dictionaries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    10\n",
       "1    11\n",
       "2    12\n",
       "3    13\n",
       "4    14\n",
       "dtype: int64"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create a series of multiple values from a list\n",
    "s = pd.Series([10, 11, 12, 13, 14])\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "13"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# value stored at index label 3\n",
    "s[3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      Mike\n",
       "1    Marcia\n",
       "2    Mikael\n",
       "3      Bleu\n",
       "dtype: object"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create a Series of alphas\n",
    "pd.Series(['Mike', 'Marcia', 'Mikael', 'Bleu'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    2\n",
       "1    2\n",
       "2    2\n",
       "3    2\n",
       "4    2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# a sequence of 5 values, all 2\n",
    "pd.Series([2]*5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    a\n",
       "1    b\n",
       "2    c\n",
       "3    d\n",
       "4    e\n",
       "dtype: object"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# use each character as a value\n",
    "pd.Series(list('abcde'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Bleu      Best doggie ever\n",
       "Marcia                 Mom\n",
       "Mikael                 Son\n",
       "Mike                   Dad\n",
       "dtype: object"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create Series from dict\n",
    "pd.Series({'Mike': 'Dad', \n",
    "           'Marcia': 'Mom', \n",
    "           'Mikael': 'Son', \n",
    "           'Bleu': 'Best doggie ever' })"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Creation using NumPy functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    4\n",
       "1    5\n",
       "2    6\n",
       "3    7\n",
       "4    8\n",
       "dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 4 through 8\n",
    "pd.Series(np.arange(4, 9))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    0.00\n",
       "1    2.25\n",
       "2    4.50\n",
       "3    6.75\n",
       "4    9.00\n",
       "dtype: float64"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 0 through 9\n",
    "pd.Series(np.linspace(0, 9, 5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0   -0.204708\n",
       "1    0.478943\n",
       "2   -0.519439\n",
       "3   -0.555730\n",
       "4    1.965781\n",
       "dtype: float64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# random numbers\n",
    "np.random.seed(12345) # always generate the same values\n",
    "# 5 normally random numbers\n",
    "pd.Series(np.random.normal(size=5))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Creation using a scalar value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create a one item Series\n",
    "s = pd.Series(2)\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    0\n",
       "1    2\n",
       "2    4\n",
       "3    6\n",
       "4    8\n",
       "dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create the Series\n",
    "s = pd.Series(np.arange(0, 5))\n",
    "# multiple all values by 2\n",
    "s * 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# The .index and .values properties"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 2, 3])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# get the values in the Series\n",
    "s = pd.Series([1, 2, 3])\n",
    "s.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "numpy.ndarray"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# show that this is a numpy array\n",
    "type(s.values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RangeIndex(start=0, stop=3, step=1)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# get the index of the Series\n",
    "s.index"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# The size and shape of a Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# example series\n",
    "s = pd.Series([0, 1, 2, 3])\n",
    "len(s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# .size is also the # of items in the Series\n",
    "s.size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(4,)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# .shape is a tuple with one value\n",
    "s.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Specifying an index at creation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Dad      Mike\n",
       "Mom    Marcia\n",
       "Son    Mikael\n",
       "Dog      Bleu\n",
       "dtype: object"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# explicitly create an index\n",
    "labels = ['Mike', 'Marcia', 'Mikael', 'Bleu']\n",
    "role = ['Dad', 'Mom', 'Son', 'Dog']\n",
    "s = pd.Series(labels, index=role)\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Dad', 'Mom', 'Son', 'Dog'], dtype='object')"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# examine the index\n",
    "s.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Mike'"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# who is the Dad?\n",
    "s['Dad']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Heads, tails and takes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# a ten item Series\n",
    "s = pd.Series(np.arange(1, 10), \n",
    "              index=list('abcdefghi'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    1\n",
       "b    2\n",
       "c    3\n",
       "d    4\n",
       "e    5\n",
       "dtype: int64"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# show the first five\n",
    "s.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    1\n",
       "b    2\n",
       "c    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# the first three\n",
    "s.head(n = 3) # s.head(3) is equivalent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "e    5\n",
       "f    6\n",
       "g    7\n",
       "h    8\n",
       "i    9\n",
       "dtype: int64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# the last five\n",
    "s.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "g    7\n",
       "h    8\n",
       "i    9\n",
       "dtype: int64"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# the last 3\n",
    "s.tail(n = 3) # equivalent to s.tail(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "b    2\n",
       "f    6\n",
       "i    9\n",
       "dtype: int64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# only take specific items by position\n",
    "s.take([1, 5, 8])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Lookup by label using the [] and .ix[] operators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    10\n",
       "b    11\n",
       "c    12\n",
       "d    13\n",
       "e    14\n",
       "dtype: int64"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# we will use this series to examine lookups\n",
    "s1 = pd.Series(np.arange(10, 15), index=list('abcde'))\n",
    "s1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# get the value with label 'a'\n",
    "s1['a']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "d    13\n",
       "b    11\n",
       "dtype: int64"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# get multiple items\n",
    "s1[['d', 'b']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "d    13\n",
       "b    11\n",
       "dtype: int64"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# gets values based upon position\n",
    "s1[[3, 1]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10    1\n",
       "11    2\n",
       "12    3\n",
       "13    4\n",
       "dtype: int64"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# to demo lookup by matching labels as integer values\n",
    "s2 = pd.Series([1, 2, 3, 4], index=[10, 11, 12, 13])\n",
    "s2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "13    4\n",
       "10    1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# this is by label not position\n",
    "s2[[13, 10]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Explicit position lookup with .iloc[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    10\n",
       "c    12\n",
       "dtype: int64"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# explicitly  by position\n",
    "s1.iloc[[0, 2]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "13    4\n",
       "12    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# explicitly  by position\n",
    "s2.iloc[[3, 2]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Explicit label lookup with .loc[]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    10\n",
       "d    13\n",
       "dtype: int64"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# explicit via labels\n",
    "s1.loc[['a', 'd']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "11    2\n",
       "12    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# get items at position 11 an d12\n",
    "s2.loc[[11, 12]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    10.0\n",
       "f     NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# -1 and 15 will be NaN\n",
    "s1.loc[['a', 'f']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Slicing a Series into subsets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10    100\n",
       "11    101\n",
       "12    102\n",
       "13    103\n",
       "14    104\n",
       "15    105\n",
       "16    106\n",
       "17    107\n",
       "18    108\n",
       "19    109\n",
       "dtype: int64"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# a Series to use for slicing\n",
    "# using index labels not starting at 0 to demonstrate \n",
    "# position based slicing\n",
    "s = pd.Series(np.arange(100, 110), index=np.arange(10, 20))\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "11    101\n",
       "12    102\n",
       "13    103\n",
       "14    104\n",
       "15    105\n",
       "dtype: int64"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# slice showing items at position 1 thorugh 5\n",
    "s[1:6]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "11    101\n",
       "12    102\n",
       "13    103\n",
       "14    104\n",
       "15    105\n",
       "dtype: int64"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# lookup via list of positions\n",
    "s.iloc[[1, 2, 3, 4, 5]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "11    101\n",
       "13    103\n",
       "15    105\n",
       "dtype: int64"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# items at position 1, 3, 5\n",
    "s[1:6:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10    100\n",
       "11    101\n",
       "12    102\n",
       "13    103\n",
       "14    104\n",
       "dtype: int64"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# first five by slicing, same as .head(5)\n",
    "s[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "14    104\n",
       "15    105\n",
       "16    106\n",
       "17    107\n",
       "18    108\n",
       "19    109\n",
       "dtype: int64"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# fourth position to the end\n",
    "s[4:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10    100\n",
       "12    102\n",
       "14    104\n",
       "dtype: int64"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# every other item in the first five positions\n",
    "s[:5:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "14    104\n",
       "16    106\n",
       "18    108\n",
       "dtype: int64"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# every other item starting at the fourth position\n",
    "s[4::2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "19    109\n",
       "18    108\n",
       "17    107\n",
       "16    106\n",
       "15    105\n",
       "14    104\n",
       "13    103\n",
       "12    102\n",
       "11    101\n",
       "10    100\n",
       "dtype: int64"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# reverse the Series\n",
    "s[::-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "14    104\n",
       "12    102\n",
       "10    100\n",
       "dtype: int64"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# every other starting at position 4, in reverse\n",
    "s[4::-2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "16    106\n",
       "17    107\n",
       "18    108\n",
       "19    109\n",
       "dtype: int64"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# -4:, which means the last 4 rows\n",
    "s[-4:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10    100\n",
       "11    101\n",
       "12    102\n",
       "13    103\n",
       "14    104\n",
       "15    105\n",
       "dtype: int64"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# :-4, all but the last 4\n",
    "s[:-4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "16    106\n",
       "17    107\n",
       "18    108\n",
       "dtype: int64"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# equivalent to s.tail(4).head(3)\n",
    "s[-4:-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0\n",
       "b    1\n",
       "c    2\n",
       "d    3\n",
       "e    4\n",
       "dtype: int64"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# used to demonstrate the next two slices\n",
    "s = pd.Series(np.arange(0, 5), \n",
    "              index=['a', 'b', 'c', 'd', 'e'])\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "b    1\n",
       "c    2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# slices by position as the index is characters\n",
    "s[1:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "b    1\n",
       "c    2\n",
       "d    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# this slices by the strings in the index\n",
    "s['b':'d']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Alignment via index labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    1\n",
       "b    2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# First series for alignment\n",
    "s1 = pd.Series([1, 2], index=['a', 'b'])\n",
    "s1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "b    4\n",
       "a    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Second series for alignment\n",
    "s2 = pd.Series([4, 3], index=['b', 'a'])\n",
    "s2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    4\n",
       "b    6\n",
       "dtype: int64"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# add them\n",
    "s1 + s2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    2\n",
       "b    4\n",
       "dtype: int64"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# multiply all values in s3 by 2\n",
    "s1 * 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    2\n",
       "b    2\n",
       "dtype: int64"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# scalar series using s3's index\n",
    "t = pd.Series(2, s1.index)\n",
    "t"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    2\n",
       "b    4\n",
       "dtype: int64"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# multiply s1 by t\n",
    "s1 * t"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "b    5\n",
       "c    6\n",
       "dtype: int64"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# we will add this to s1\n",
    "s3 = pd.Series([5, 6], index=['b', 'c'])\n",
    "s3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    NaN\n",
       "b    7.0\n",
       "c    NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# s1 and s3 have different sets of index labels\n",
    "# NaN will result for a and c\n",
    "s1 + s3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    1.0\n",
       "a    2.0\n",
       "b    3.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 2 'a' labels\n",
    "s1 = pd.Series([1.0, 2.0, 3.0], index=['a', 'a', 'b'])\n",
    "s1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    4.0\n",
       "a    5.0\n",
       "c    6.0\n",
       "a    7.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 3 a labels\n",
    "s2 = pd.Series([4.0, 5.0, 6.0, 7.0], index=['a', 'a', 'c', 'a'])\n",
    "s2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    5.0\n",
       "a    6.0\n",
       "a    8.0\n",
       "a    6.0\n",
       "a    7.0\n",
       "a    9.0\n",
       "b    NaN\n",
       "c    NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# will result in 6 'a' index labels, and NaN for b and c\n",
    "s1 + s2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Boolean selection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    False\n",
       "b    False\n",
       "c    False\n",
       "d     True\n",
       "e     True\n",
       "dtype: bool"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# which rows have values that are > 5?\n",
    "s = pd.Series(np.arange(0, 5), index=list('abcde'))\n",
    "logical_results = s >= 3\n",
    "logical_results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "d    3\n",
       "e    4\n",
       "dtype: int64"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# select where True\n",
    "s[logical_results]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Series([], dtype: int64)"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# a little shorter version\n",
    "s[s > 5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# commented as it throws an exception\n",
    "# s[s >= 2 and s < 5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "c    2\n",
       "d    3\n",
       "e    4\n",
       "dtype: int64"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# correct syntax\n",
    "s[(s >=2) & (s < 5)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# are all items >= 0?\n",
    "(s >= 0).all()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# any items < 2?\n",
    "s[s < 2].any()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# how many values < 2?\n",
    "(s < 2).sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Reindexing a Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    0.469112\n",
       "1   -0.282863\n",
       "2   -1.509059\n",
       "3   -1.135632\n",
       "4    1.212112\n",
       "dtype: float64"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# sample series of five items\n",
    "np.random.seed(123456)\n",
    "s = pd.Series(np.random.randn(5))\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0.469112\n",
       "b   -0.282863\n",
       "c   -1.509059\n",
       "d   -1.135632\n",
       "e    1.212112\n",
       "dtype: float64"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# change the index\n",
    "s.index = ['a', 'b', 'c', 'd', 'e']\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0.469112\n",
       "b   -0.282863\n",
       "c   -1.509059\n",
       "d   -1.135632\n",
       "dtype: float64"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# a series that we will reindex\n",
    "np.random.seed(123456)\n",
    "s1 = pd.Series(np.random.randn(4), ['a', 'b', 'c', 'd'])\n",
    "s1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0.469112\n",
       "c   -1.509059\n",
       "g         NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# reindex with different number of labels\n",
    "# results in dropped rows and/or NaN's\n",
    "s2 = s1.reindex(['a', 'c', 'g'])\n",
    "s2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0   NaN\n",
       "1   NaN\n",
       "2   NaN\n",
       "0   NaN\n",
       "1   NaN\n",
       "2   NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# different types for the same values of labels\n",
    "# causes big trouble\n",
    "s1 = pd.Series([0, 1, 2], index=[0, 1, 2])\n",
    "s2 = pd.Series([3, 4, 5], index=['0', '1', '2'])\n",
    "s1 + s2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    3\n",
       "1    5\n",
       "2    7\n",
       "dtype: int64"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# reindex by casting the label types\n",
    "# and we will get the desired result\n",
    "s2.index = s2.index.values.astype(int)\n",
    "s1 + s2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0.469112\n",
       "f    0.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# fill with 0 instead of NaN\n",
    "s2 = s.copy()\n",
    "s2.reindex(['a', 'f'], fill_value=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      red\n",
       "3    green\n",
       "5     blue\n",
       "dtype: object"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# create example to demonstrate fills\n",
    "s3 = pd.Series(['red', 'green', 'blue'], index=[0, 3, 5])\n",
    "s3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      red\n",
       "1      red\n",
       "2      red\n",
       "3    green\n",
       "4    green\n",
       "5     blue\n",
       "6     blue\n",
       "dtype: object"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# forward fill example\n",
    "s3.reindex(np.arange(0,7), method='ffill')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      red\n",
       "1    green\n",
       "2    green\n",
       "3    green\n",
       "4     blue\n",
       "5     blue\n",
       "6      NaN\n",
       "dtype: object"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# backwards fill example\n",
    "s3.reindex(np.arange(0,7), method='bfill')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Modifying a Series in-place"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0.469112\n",
       "b   -0.282863\n",
       "c   -1.509059\n",
       "dtype: float64"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# generate a Series to play with\n",
    "np.random.seed(123456)\n",
    "s = pd.Series(np.random.randn(3), index=['a', 'b', 'c'])\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a      0.469112\n",
       "b     -0.282863\n",
       "c     -1.509059\n",
       "d    100.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# change a value in the Series\n",
    "# this is done in-place\n",
    "# a new Series is not returned that has a modified value\n",
    "s['d'] = 100\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a      0.469112\n",
       "b     -0.282863\n",
       "c     -1.509059\n",
       "d   -100.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# modify the value at 'd' in-place\n",
    "s['d'] = -100\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "b     -0.282863\n",
       "c     -1.509059\n",
       "d   -100.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# remove a row / item\n",
    "del(s['a'])\n",
    "s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "b   -0.282863\n",
       "c   -1.509059\n",
       "dtype: float64"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "copy = s.copy() # preserve s\n",
    "slice = copy[:2] # slice with first two rows\n",
    "slice"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "b      0.000000\n",
       "c     -1.509059\n",
       "d   -100.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# change item with label 10 to 1000\n",
    "slice['b'] = 0\n",
    "# and see it in the source\n",
    "copy"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
